
*********************************************************************************
** PRELIMINARIES
*********************************************************************************

********************************************************************
** WAVE 1 POST-PLANTING DATA
********************************************************************

use "$dir/rawdata/LSMS Nigeria/2010/Post Planting Wave 1/Household/sect1_plantingw1.dta", clear // ROSTER
count
local samplesize=`r(N)'
merge 1:1 hhid indiv using "$dir/rawdata/LSMS Nigeria/2010/Post Planting Wave 1/Household/sect2_plantingw1.dta", gen(merge_sec2) assert(master matched) // EDUCATION
merge 1:1 hhid indiv using "$dir/rawdata/LSMS Nigeria/2010/Post Planting Wave 1/Household/sect3_plantingw1.dta", gen(merge_sec3) assert(master matched) // LABOR

** In HH
gen w1p_inhh=s1q7!=2

** Household size
gen temp=1 if w1p_inhh==1
bys hhid: egen w1p_hh_size=total(temp)
drop temp

** Number of children 12 or younger
gen temp=1 if s1q4<=12 & w1p_inhh==1
bys hhid: egen w1p_hh_under12=total(temp)
drop temp

** Number of youth 18 or younger
gen temp=1 if s1q4<=18 & w1p_inhh==1
bys hhid: egen w1p_hh_under18=total(temp)
drop temp

** Number of adults 60 or older
gen temp=1 if s1q4>=60 & !missing(s1q4) & s1q4!=999 & w1p_inhh==1
bys hhid: egen w1p_hh_over60=total(temp)
drop temp

** Adults 12 to 60
gen temp=1 if s1q4>12 & s1q4<60 & !missing(s1q4) & s1q4!=999 & w1p_inhh==1
bys hhid: egen w1p_hh_12to60=total(temp)
drop temp

** Marital status
gen w1p_married=(s1q8==1 | s1q8==2) if !missing(s1q8)
gen w1p_married_monog=s1q8==1 if !missing(s1q8)
gen w1p_married_polyg=s1q8==2 if !missing(s1q8)

** Female
gen w1p_female=(s1q2==2) if !missing(s1q2)

** Age
gen w1p_age=s1q4
replace w1p_age=. if w1p_age==999

** Age of spouse
sum indiv 
forvalues x=1/`r(max)' {
	gen age_person`x'_temp=s1q4 if indiv==`x' & !missing(s1q4)
	bys hhid: egen age_person`x'=max(age_person`x'_temp)
	drop age_person`x'_temp
}

** Average age of others in the household
sum indiv 
forvalues x=1/`r(max)' {
	replace age_person`x'=. if indiv==`x'
}
egen w1p_hh_avgage=rowmean(age_person*)

** Spouse age
sum indiv
forvalues x=1/`r(max)' {
	replace age_person`x'=. if s1q11!=`x'
}
egen w1p_spouse_age=rowmean(age_person*)

** Age gap
gen w1p_spouse_agegap=w1p_age-w1p_spouse_age

** Birthday
gen w1p_birthday=mdy(s1q5_month,s1q5_day,s1q5_year)
format w1p_birthday %td

** Birth year
gen w1p_birthyear=s1q5_year
recode w1p_birthyear 9999=. 9998=.
replace w1p_birthyear=2010-w1p_age if missing(w1p_birthyear)

** Highest education of individual
gen w1p_educ=s2q7
replace w1p_educ=0 if s2q4==2 // Never attended school
label values w1p_educ s2q7
replace w1p_educ=. if s2q7==98

** Still in school
gen w1p_inschool=(s2q9==1) if !missing(s2q9)

** Literate
gen w1p_literate=(s2q3==1) if !missing(s2q3)

** Worked for a non-HH member
gen w1p_work_outsideHH=s3q4==1 if !missing(s3q4)

** Worked in agriculture for a HH member
gen w1p_work_agHH=s3q5==1 if !missing(s3q5)

** Worked in HH business
gen w1p_work_bizHH=s3q6==1 if !missing(s3q6)

** Any work in last 7 days (agriculture, for non-HH member, own account/business)
gen w1p_worked_last7days=0
replace w1p_worked_last7days=1 if s3q4==1
replace w1p_worked_last7days=1 if s3q5==1 
replace w1p_worked_last7days=1 if s3q6==1
replace w1p_worked_last7days=. if missing(s3q4) & missing(s3q5) & missing(s3q6)

** Hours worked in last 7 days
egen w1p_work_hrs=rowtotal(s3q18 s3q30)
replace w1p_work_hrs=. if missing(w1p_worked_last7days)

** Works in agriculture
gen w1p_work_agri=(s3q14==1 | s3q26==1) if (!missing(s3q14) | !missing(s3q26))
replace w1p_work_agri=0 if w1p_worked_last7days==0

** Industry of occupation
gen w1p_occ=s3q14
label values w1p_occ s3q14

** Highest education of biological father
gen w1p_father_educ=s1q16
label values w1p_father_educ s1q16
replace w1p_father_educ=. if w1p_father_educ==98

** Industry of occupation of biological father
gen w1p_father_occ=s1q17
label values w1p_father_occ s1q17

** Highest education of biological mother
gen w1p_mother_educ=s1q21
label values w1p_mother_educ s1q21
replace w1p_mother_educ=. if w1p_mother_educ==98

** Industry of occupation of biological mother
gen w1p_mother_occ=s1q22
label values w1p_mother_occ s1q22

** ID of father and mother (for linking)
gen w1p_father_id=s1q14 
gen w1p_mother_id=s1q19

********************************************************************
** CONNECT PARENT INFORMATION
********************************************************************

** PREPARE FOR MERGING
keep zone state lga sector ea hhid indiv w1p_*

** #1. Full dataset
tempfile data
save `data', replace

** #2. To merge in fathers
use `data', clear
keep hhid indiv w1p_female w1p_birthyear w1p_age w1p_birthyear w1p_literate w1p_educ w1p_occ
ren w1p_* w1p_*_father_mrg
ren indiv w1p_father_id
tempfile fathers
save `fathers', replace

** #3. To merge in mothers
use `data', clear
keep hhid indiv w1p_female w1p_birthyear w1p_age w1p_birthyear w1p_literate w1p_educ w1p_occ
ren w1p_* w1p_*_mother_mrg
ren indiv w1p_mother_id
tempfile mothers
save `mothers', replace

** MERGE
use `data', clear
merge m:1 hhid w1p_father_id using `fathers', gen(merge_fathers) keep(master matched)
merge m:1 hhid w1p_mother_id using `mothers', gen(merge_mothers) keep(master matched)

count
assert `samplesize'==`r(N)'

** CLEAN UP
** Mismatched merges
gen merge_flag=1 if w1p_female_father_mrg==1 // Merged fathers coded as female
replace merge_flag=1 if w1p_female_mother_mrg==0 // Merged mothers coded as male
qui foreach var of varlist *_mrg {
	replace `var'=. if merge_flag==1
}
 
** FILL IN INFORMATION, PRIORITIZING THAT COLLECTED IN INDIVIDUAL QUESTIONNAIRE
qui foreach parent in father mother {
	
	** Age
	gen w1p_`parent'_age=w1p_age_`parent'_mrg
	order w1p_`parent'_age, before(w1p_`parent'_educ)
	
	** Birth Year
	gen w1p_`parent'_birthyear=w1p_birthyear_`parent'_mrg
	order w1p_`parent'_birthyear, after(w1p_`parent'_age)
	
	** Literacy
	gen w1p_`parent'_literate=w1p_literate_`parent'_mrg
	order w1p_`parent'_literate, after(w1p_`parent'_birthyear)
	
	** Education
	replace w1p_`parent'_educ=w1p_educ_`parent'_mrg if missing(w1p_`parent'_educ)
	
	** Occupation
	replace w1p_`parent'_occ=w1p_occ_`parent'_mrg if missing(w1p_`parent'_occ)

}

drop merge_* *_mrg *father_id *mother_id

**************************************************************
** CONSTRUCT VARIABLES 
**************************************************************

** GEOGRAPHIC INFORMATION
gen w1p_geo_urban=(sector==1) if !missing(sector)
gen w1p_geo_state=state
gen w1p_geo_lga=lga
label var w1p_geo_urban "Urban residence"
label var w1p_geo_state "State of residence"
label var w1p_geo_lga "LGA of residence"

merge m:1 hhid using "$dir/rawdata/LSMS Nigeria/2010/cons_agg_wave1_visit2.dta", keep(master matched) nogen

gen w1p_urban_fromcons=(rururb==1) if !missing(rururb)
order w1p_urban_fromcons, after(rururb)
label var w1p_urban_fromcons "Urban residence"

ren hhsize w1p_hhsize_fromcons


keep hhid indiv w1*
order hhid indiv w1p_geo*
save "$dir/data/NGA_w1p_individual.dta", replace
